# generate_data.py
import pandas as pd
import numpy as np

np.random.seed(42)

num_users = 50
num_courses = 20
num_interactions = 300

data = {
    'user_id': np.random.randint(1, num_users + 1, size=num_interactions),
    'course_id': np.random.randint(1, num_courses + 1, size=num_interactions),
    'rating': np.random.randint(1, 6, size=num_interactions)  # 1~5 分
}

df = pd.DataFrame(data).drop_duplicates(subset=['user_id', 'course_id']).reset_index(drop=True)
df.to_csv('../data/ratings.csv', index=False)

print("✅ 模拟数据已生成到 ratings.csv")